library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
#library(corrplot)
#source("~/GitHub/FRESA.CAD/R/RRPlot.R")
#source("~/GitHub/FRESA.CAD/R/PoissonEventRiskCalibration.R")
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
#pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)
data(cancer)
colon <- subset(colon,etype==1)
colon$etype <- NULL
rownames(colon) <- colon$id
colon$id <- NULL
colon <- colon[complete.cases(colon),]
time <- colon$time
status <- colon$status
data <- colon
data$time <- NULL
data$study <- NULL
table(data$status)

0 1 442 446

dataColon <- as.data.frame(model.matrix(status~.*age,data))
dataColon$`(Intercept)` <- NULL
dataColon$time <- time/365
dataColon$status <- status
colnames(dataColon) <-str_replace_all(colnames(dataColon),":","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\.","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\+","_")
data <- NULL

trainsamples <- sample(nrow(dataColon),0.7*nrow(dataColon))
dataColonTrain <- dataColon[trainsamples,]
dataColonTest <- dataColon[-trainsamples,]


pander::pander(table(dataColonTrain$status))
0 1
308 313
pander::pander(table(dataColonTest$status))
0 1
134 133

Modeling

ml <- BSWiMS.model(Surv(time,status)~1,data=dataColonTrain,NumberofRepeats = 10)

[+++-+++++++-+++-+++-+++-+++-+++-+++-+++-]…

sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower HR upper u.Accuracy
age_nodes 5.32e-04 1.000 1.001 1.001 0.615
node4 2.97e-01 1.208 1.345 1.498 0.610
age_node4 2.54e-03 1.002 1.003 1.004 0.610
extent 2.29e-01 1.120 1.257 1.411 0.560
rxLev_5FU_age -2.79e-03 0.996 0.997 0.999 0.565
rxLev_5FU -8.99e-02 0.866 0.914 0.965 0.565
adhere 1.42e-01 1.033 1.153 1.287 0.538
nodes 8.50e-03 1.002 1.009 1.015 0.617
age_adhere -9.51e-04 0.998 0.999 1.000 0.538
age_extent 1.96e-10 1.000 1.000 1.000 0.535
Table continues below
  r.Accuracy full.Accuracy u.AUC r.AUC full.AUC
age_nodes 0.523 0.614 0.616 0.523 0.616
node4 0.607 0.625 0.612 0.607 0.626
age_node4 0.590 0.607 0.612 0.589 0.609
extent 0.610 0.625 0.557 0.612 0.626
rxLev_5FU_age 0.618 0.625 0.564 0.620 0.626
rxLev_5FU 0.609 0.607 0.564 0.611 0.609
adhere 0.614 0.614 0.541 0.615 0.615
nodes 0.614 0.625 0.618 0.615 0.626
age_adhere 0.614 0.615 0.541 0.615 0.616
age_extent 0.504 0.535 0.534 0.500 0.534
  IDI NRI z.IDI z.NRI Delta.AUC Frequency
age_nodes 0.03421 0.451 6.32 6.13 0.092524 1.0
node4 0.03346 0.435 5.45 6.26 0.019414 1.0
age_node4 0.03106 0.417 5.33 6.04 0.019620 1.0
extent 0.01940 0.221 3.89 4.02 0.014547 1.0
rxLev_5FU_age 0.01388 0.255 3.52 3.42 0.006196 1.0
rxLev_5FU 0.01245 0.255 3.26 3.42 -0.001775 1.0
adhere 0.00658 0.189 2.53 3.24 -0.000363 0.7
nodes 0.00396 0.155 2.37 2.02 0.010566 1.0
age_adhere 0.00525 0.402 2.25 5.58 0.001831 0.1
age_extent 0.00569 0.137 2.05 1.72 0.034241 0.1

Cox Model Performance

Here we evaluate the model using the RRPlot() function.

The evaluation of the raw Cox model with RRPlot()

Here we will use the predicted event probability assuming a baseline hazard for events withing 5 years

index <- predict(ml,dataColonTrain)
timeinterval <- 2*mean(subset(dataColonTrain,status==1)$time)

h0 <- sum(dataColonTrain$status & dataColonTrain$time <= timeinterval)
h0 <- h0/sum((dataColonTrain$time > timeinterval) | (dataColonTrain$status==1))

rdata <- cbind(dataColonTrain$status,ppoisGzero(index,h0))

rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90),
                     timetoEvent=dataColonTrain$time,
                     title="Raw Train: Colon Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Uncalibrated Performance Report

pander::pander(t(rrAnalysisTrain$OERatio),caption="O/E Ratio")
O/E Ratio
est lower upper
0.971 0.866 1.08
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Ratio")
O/E Ratio
mean 50% 2.5% 97.5%
1.49 1.49 1.46 1.51
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Ratio")
O/Acum Ratio
mean 50% 2.5% 97.5%
1.36 1.36 1.36 1.37
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.661 0.661 0.631 0.69
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.693 0.652 0.734
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.307 0.256 0.361
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.86 0.931
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.476
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.7 1.48 1.96
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 67.400462 on 1 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 494 217 267.8 9.64 67.4
class=1 127 96 45.2 57.16 67.4

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,dataColonTrain,"status","time")

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.688 1.52 2.88

The RRplot() of the calibrated model

h0 <- calprob$h0
timeinterval <- calprob$timeInterval;

rdata <- cbind(dataColonTrain$status,calprob$prob)


rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90),
                     timetoEvent=dataColonTrain$time,
                     title="Calibrated Train: Colon",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Calibrated Train Performance

pander::pander(t(rrAnalysisTrain$OERatio),caption="O/E Ratio")
O/E Ratio
est lower upper
0.773 0.69 0.864
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Ratio")
O/E Ratio
mean 50% 2.5% 97.5%
0.966 0.966 0.952 0.981
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Ratio")
O/Acum Ratio
mean 50% 2.5% 97.5%
1.05 1.05 1.05 1.05
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.661 0.661 0.632 0.689
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.693 0.652 0.734
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.307 0.256 0.361
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.86 0.931
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.625
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.7 1.48 1.96
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 67.400462 on 1 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 494 217 267.8 9.64 67.4
class=1 127 96 45.2 57.16 67.4

Evaluating on the test set

The calibrated h0 and timeinterval were estimated on the training set

index <- predict(ml,dataColonTest)
rdata <- cbind(dataColonTest$status,ppoisGzero(index,h0))

rrAnalysisTest <- RRPlot(rdata,atThr = rrAnalysisTrain$thr_atP,
                     timetoEvent=dataColonTest$time,
                     title="Test: Colon Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Test Performance

pander::pander(t(rrAnalysisTest$OERatio),caption="O/E Ratio")
O/E Ratio
est lower upper
0.755 0.632 0.895
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Ratio")
O/E Ratio
mean 50% 2.5% 97.5%
0.916 0.916 0.889 0.943
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Ratio")
O/Acum Ratio
mean 50% 2.5% 97.5%
1.07 1.07 1.06 1.08
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.665 0.665 0.618 0.712
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.692 0.629 0.755
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.211 0.145 0.29
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.896 0.831 0.942
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.625
pander::pander(t(rrAnalysisTest$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.45 1.12 1.87
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
Logrank test Chisq = 10.892022 on 1 degrees of freedom, p = 0.000966
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 225 105 117.3 1.28 10.9
class=1 42 28 15.7 9.53 10.9

Cross-Validation

Here we will cross validate the training set and evaluate also on the testing set. The h0 and the timeinterval are the ones estimated on the calibration process

rcv <- randomCV(theData=dataColonTrain,
                theOutcome = Surv(time,status)~1,
                fittingFunction=BSWiMS.model, 
                trainFraction = 0.75,
                repetitions=50,
                classSamplingType = "Pro",
                testingSet=dataColonTest
         )

.[++++++].[++++++].[+++-].[+++].[++++].[++++].[+++-].[+++].[++++].[++++]10 Tested: 850 Avg. Selected: 9 Min Tests: 1 Max Tests: 10 Mean Tests: 4.976471 . MAD: 0.473827

.[++++].[++-].[+++–].[++++].[+++-].[++++-].[++++].[+++++].[+++-].[+++]20 Tested: 886 Avg. Selected: 9 Min Tests: 1 Max Tests: 20 Mean Tests: 9.548533 . MAD: 0.4725908

.[++].[+++-].[++++].[+++-].[+++++].[+++-].[++++].[+++-].[++++].[+++-]30 Tested: 888 Avg. Selected: 8.733333 Min Tests: 1 Max Tests: 30 Mean Tests: 14.29054 . MAD: 0.4726087

.[++].[++++-].[++++-].[+++].[++-].[+++++++].[+++++++].[++++].[+++-].[++++]40 Tested: 888 Avg. Selected: 8.65 Min Tests: 3 Max Tests: 40 Mean Tests: 19.05405 . MAD: 0.4729596

.[+++-].[+++].[+++].[++++-].[+++-].[++++-].[+++].[+++-].[+++-].[++++-]50 Tested: 888 Avg. Selected: 8.6 Min Tests: 4 Max Tests: 50 Mean Tests: 23.81757 . MAD: 0.4729532

stp <- rcv$survTestPredictions
stp <- stp[!is.na(stp[,4]),]

bbx <- boxplot(unlist(stp[,1])~rownames(stp),plot=FALSE)
times <- bbx$stats[3,]
status <- boxplot(unlist(stp[,2])~rownames(stp),plot=FALSE)$stats[3,]
prob <- ppoisGzero(boxplot(unlist(stp[,4])~rownames(stp),plot=FALSE)$stats[3,],h0)

rdatacv <- cbind(status,prob)
rownames(rdatacv) <- bbx$names
names(times) <- bbx$names

rrAnalysisCVTest <- RRPlot(rdatacv,atThr = rrAnalysisTrain$thr_atP,
                     timetoEvent=times,
                     title="CV Test: Colon Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

CV Test Performance

pander::pander(t(rrAnalysisCVTest$OERatio),caption="O/E Ratio")
O/E Ratio
est lower upper
0.747 0.679 0.819
pander::pander(t(rrAnalysisCVTest$OE95ci),caption="O/E Ratio")
O/E Ratio
mean 50% 2.5% 97.5%
0.885 0.885 0.873 0.897
pander::pander(t(rrAnalysisCVTest$OAcum95ci),caption="O/Acum Ratio")
O/Acum Ratio
mean 50% 2.5% 97.5%
0.999 0.999 0.997 1
pander::pander(rrAnalysisCVTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.653 0.653 0.627 0.677
pander::pander(t(rrAnalysisCVTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.68 0.645 0.715
pander::pander((rrAnalysisCVTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.314 0.271 0.359
pander::pander((rrAnalysisCVTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.867 0.831 0.897
pander::pander(t(rrAnalysisCVTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.625
pander::pander(t(rrAnalysisCVTest$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.58 1.4 1.79
pander::pander(rrAnalysisCVTest$surdif,caption="Logrank test")
Logrank test Chisq = 72.333030 on 1 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 689 306 372.4 11.8 72.3
class=1 199 140 73.6 59.9 72.3